helper function to process time series

And some package and labeling info

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)


timeseries_dir <- 'extracted_timeseries/'

metrics_write_dir <-  'extracted_timeseries/extracted_metrics/'


# get ecs ordering/labels
esm_labels <- read.csv(paste0(timeseries_dir,'global_tas_allesms.csv'), stringsAsFactors = FALSE) %>%
  select(esm) %>% distinct %>% 
  mutate(plotesm = paste0(letters[as.integer(row.names(.))], '.', esm),
         ECS_order = as.integer(row.names(.)))
print(esm_labels)
##              esm         plotesm ECS_order
## 1    CAMS-CSM1-0   a.CAMS-CSM1-0         1
## 2         MIROC6        b.MIROC6         2
## 3      GFDL-ESM4     c.GFDL-ESM4         3
## 4      FGOALS-g3     d.FGOALS-g3         4
## 5  MPI-ESM1-2-HR e.MPI-ESM1-2-HR         5
## 6  MPI-ESM1-2-LR f.MPI-ESM1-2-LR         6
## 7     MRI-ESM2-0    g.MRI-ESM2-0         7
## 8  ACCESS-ESM1-5 h.ACCESS-ESM1-5         8
## 9   IPSL-CM6A-LR  i.IPSL-CM6A-LR         9
## 10   CESM2-WACCM   j.CESM2-WACCM        10
## 11   UKESM1-0-LL   k.UKESM1-0-LL        11
## 12       CanESM5       l.CanESM5        12
process_time_series <- function(time_series_df, esm_label_info,
                                hist_start = 1995, hist_end = 2014){
  # Get ensemble member values for projection runs:
  # 1. 2080-2099 average
  # 2. loess detrend each ensemble member to get IAV
  #
  # split by run
  non_hist <- time_series_df %>% 
    filter(experiment != 'historical') %>% 
    select(year, ann_agg, esm, experiment, ensemble, variable, region)
  
  grouped <- split(non_hist, f = list(non_hist$esm, 
                                      non_hist$experiment,
                                      non_hist$ensemble, 
                                      non_hist$variable ,
                                      non_hist$region) )
  # split creates group of every possible combo of the variables and fills in
  # empty dataframes for the ones that don't exist in data. Drop those
  grouped <- grouped[lapply(grouped, nrow)>0]
  
  processed_groups <- lapply(grouped, FUN = function(run_df){
    loess_resids <- loess(run_df$ann_agg ~ run_df$year)$residuals
    
    run_df %>%
      filter(year >= 2080, year <= 2099) %>%
      group_by(esm, experiment, ensemble, variable, region) %>%
      summarise(average_2080_2099 = mean(ann_agg)) %>% 
      ungroup  %>%
      mutate(iasd = sd((loess_resids))) ->
      output_df
    
    return(output_df)
    
  })
  
  individual_stats <- do.call(bind_rows, processed_groups)
  rm(non_hist)
  rm(grouped)
  rm(processed_groups)
  
  # calculate ensemble averages
  individual_stats %>%
    group_by(esm, experiment, variable,region) %>%
    summarise(average_2080_2099 = mean(average_2080_2099),
              iasd = mean(iasd)) %>%
    ungroup ->
    ensemble_stats
  
  
  # get ensemble average historical average value:
  time_series_df %>%
    filter(experiment == 'historical',
           year >= hist_start,
           year <= hist_end) %>%
    group_by(esm, experiment, ensemble, variable, region) %>%
    summarise(historical_average = mean(ann_agg)) %>%
    ungroup %>%
    group_by(esm, experiment, variable, region) %>%
    summarise(historical_average = mean(historical_average)) %>%
    ungroup %>%
    select(-experiment) ->
    historical_ens
  
  
  # shape and calculate changes for plotting:
  ensemble_stats %>%
    left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
    left_join(esm_label_info, by = 'esm') %>%
    mutate(change = average_2080_2099 - historical_average,
           pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
    plot_tbl
  
  # shape and calculate changes for plotting for each individ ensemble:
  individual_stats %>%
    left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
    left_join(esm_label_info, by = 'esm') %>%
    mutate(change = average_2080_2099 - historical_average,
           pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
    plot_tbl_ind
  
return(list(plot_tbl, plot_tbl_ind))
}




prep_esm_TP_data_ipcc <- function(esmname, metric_write_dir = metrics_write_dir){
  
  region_timeseries <- read.csv(paste0(timeseries_dir, 'IPCC_land_regions_tas_', esmname, '_timeseries_1980_2099.csv'),
                              stringsAsFactors = FALSE) %>% mutate(region = acronym)
  
  region_tas_summary <- suppressMessages(process_time_series(time_series_df = region_timeseries, esm_label_info = esm_labels))

  
  region_pr_summary <- suppressMessages(process_time_series(time_series_df = 
                                                            read.csv(paste0(timeseries_dir, 'IPCC_land_regions_pr_', esmname,'_timeseries_1980_2099.csv'),
                                                                     stringsAsFactors = FALSE) %>%
                                                            rename(ann_agg=pr) %>% 
                                                            mutate(region = acronym) ,
                                                          esm_label_info = esm_labels))
  
  
  # reshape so each row is an observation
  # observation = esm - experiment - region - tas change-tas iasd - pr pct change
  region_tas_summary[[2]] %>% 
    select(esm, experiment, ensemble, region, iasd, change) %>%
    rename(tas_change = change) %>%
    left_join(region_pr_summary[[2]] %>%
                select(esm, experiment, ensemble, region, pct_change) %>% 
                rename(pr_pct = pct_change),
              by = c('esm', 'experiment', 'ensemble', 'region')) ->
    region_summary_ind
  
  write.csv(region_summary_ind, paste0(metric_write_dir, 'IPCC_land_regions_', esmname, '_indrun_tp_metrics.csv'), row.names = FALSE)

  
  
  # reshape so each row is an observation
  # observation = esm - experiment - region - tas change-tas iasd - pr pct change
  region_tas_summary[[1]] %>% 
    select(esm, experiment, region, iasd, change) %>%
    rename(tas_change = change) %>%
    left_join(region_pr_summary[[1]] %>%
                select(esm, experiment, region, pct_change) %>% 
                rename(pr_pct = pct_change),
              by = c('esm', 'experiment', 'region')) ->
    region_summary
  
  write.csv(region_summary, paste0(metric_write_dir, 'IPCC_land_regions_', esmname, '_ensavg_tp_metrics.csv'), row.names = FALSE)

return(region_summary)
  
}

Process and save all ESMs

Consider an observation = esm - experiment - region : tas change-tas iasd - pr pct change

# extract metrics for individ ensembles and ensemble averages and save as CSV 
# for each ESM, also bind ensemble averages together into a main holder and 
# also save that
region_summary_main <- data.frame()
for (esm in esm_labels$esm){
  region_summary <- prep_esm_TP_data_ipcc(esmname=esm, metric_write_dir = metrics_write_dir)
  
  bind_rows(region_summary_main,
            region_summary) ->
    region_summary_main
  
  rm(region_summary)
}

write.csv(region_summary_main,paste0(metrics_write_dir, 'IPCC_land_regions_allESMs_ensavg_tp_metrics.csv'), row.names = FALSE)
rm(region_summary_main)

Load ESM data that’s been processed

region_summary_main <- read.csv(paste0(metrics_write_dir, 'IPCC_land_regions_allESMs_ensavg_tp_metrics.csv'), 
                                stringsAsFactors = FALSE)
# make a copy to operate on
region_summary <- region_summary_main
print(head(region_summary))
##           esm experiment region      iasd tas_change     pr_pct
## 1 CAMS-CSM1-0     ssp119    ARP 0.3405867  0.4011041 -4.0602583
## 2 CAMS-CSM1-0     ssp119    CAF 0.3316650  0.2307977  0.1347643
## 3 CAMS-CSM1-0     ssp119    CAR 0.1914584  0.2074237  1.9541604
## 4 CAMS-CSM1-0     ssp119    CAU 0.4675852  0.4073631 -0.3602108
## 5 CAMS-CSM1-0     ssp119    CNA 0.5567985  0.4653044  3.3291197
## 6 CAMS-CSM1-0     ssp119    EAS 0.3397290  0.4744518  3.9221436

Spatial info

we want the shapes for plotting anyway, so prep them

library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
shp <- st_read(dsn = 'IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp', stringsAsFactors = F)
## Reading layer `IPCC-WGI-reference-regions-v4' from data source 
##   `/Users/snyd535/Documents/GitHub/stitches_in_r/R/inst/shinyApp/python_curation/IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 58 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 90
## Geodetic CRS:  WGS 84
# add a numerical region id
shp %>% 
  mutate(region_id = as.integer(row.names(.))) ->
  shp

# add coordinate info probably
shp1 <-  st_transform(shp, "+proj=longlat +ellps=WGS84 +datum=WGS84")

# extract
coords <- as.data.frame(st_coordinates(shp1))


# get a mean lon and lat value in each shape
coords %>%
  rename(lon = X, lat = Y, region_id = L3) %>%
  left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
  filter(grepl('PO', Acronym)) %>% 
  # have to have lon on 0:360 so th pacific ocean behaves even though not
  # looking at that here
  mutate(lon_360 = if_else(lon >=0, lon, lon+360))%>%
  group_by(region_id) %>%
  summarise(mean_lon = mean(lon_360),
            mean_lat = mean(lat)) %>%
  ungroup  %>%
  mutate(mean_lon = if_else(mean_lon >= 0 & mean_lon <= 180, 
                            mean_lon, mean_lon - 360) ) ->
  mean_pts_PO

coords %>%
  rename(lon = X, lat = Y, region_id = L3) %>%
  left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
  filter(!grepl('PO', Acronym)) %>% 
  # have to have lon on 0:360 so th pacific ocean behaves even though not
  # looking at that here
  group_by(region_id) %>%
  summarise(mean_lon = mean(lon),
            mean_lat = mean(lat)) %>%
  ungroup  %>% 
  bind_rows(mean_pts_PO)->
  mean_pts 
# Join to the shape file and make sure this very simple way of
# doing things ends up with a lon lat that is actually in each region
shp %>%
  left_join(mean_pts, by = 'region_id') ->
  shp

ggplot() +
  geom_sf(data = shp  ) +
  geom_point(data = shp, mapping = aes(x = mean_lon, y = mean_lat), color = 'red') +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =5)

Convert ESM metrics to RGB and plot

we have 3 variables per observarion = experimentXregion -> convert to rgb values

  • looking across ESMs, we’ll want to make sure we have them all on consistent range before converting to (0,255)

  • each color/family of colors does have a physical interpretation in terms of iasd, t, p

  • red = temperature

  • blue = precip

  • iasd = green

# convert to RGB
region_summary %>%
  filter(experiment != 'ssp119',
         experiment != 'ssp434',
         experiment != 'ssp460',
         experiment != 'ssp534-over') ->
  region_summary 
region_summary$r <- scales::rescale(region_summary$tas_change, to =c(0,255))
region_summary$g <- scales::rescale(region_summary$iasd, to =c(0,255))
region_summary$b <- scales::rescale(region_summary$pr_pct, to =c(0,255))


# add spatial numerical info
region_summary %>%
  left_join(as.data.frame(shp) %>% select(Acronym, mean_lon, mean_lat), by = c('region' = 'Acronym')) %>%
  rename(lon  = mean_lon, lat = mean_lat) %>%
  # add the original colors 
  mutate(orig_color = rgb(.$r, .$g, .$b, maxColorValue  = 255),
         color_order = as.integer(row.names(.))) ->
  region_summary

region_summary %>%
  select(color_order, orig_color) %>%
  distinct() ->
  colors

region_numerical <- as.data.frame(region_summary[c('lon', 'lat', 'r', "g", "b")])

Plot directly

  • Green dominant = iasd dominates T and P

  • red dominant = T dominates iasd and P

  • blue dominant = P dominates T and IASD

  • black: magnitude of (r,g,b) overall small/near min value of T,P, IASD across SSPs and ESMs as set up

  • white: magnitude of (r,g,b) overall large/near max value of T,P, IASD across SSPs and ESMs as set up

MAJOR downside - don’t have a clear increasing vs decreasing interpretation with how it’s set up.

  • for temperature, smallest change is -0.03199914 deg C, so effectively only ever increasing Temp
  • IASD strictly >0
  • That leaves precip with no clear interpretable demarcation between increase and decrease; and b=127 is not the same as pr=0% change because range of values is not symmetrical about 0 in data plotting
shp %>% 
  left_join(region_summary, by = c('Acronym' = 'region')) %>%
  left_join(esm_labels %>% select(esm, plotesm), by = 'esm') ->
    shp_esms

p<- ggplot() +
    geom_sf(data = shp_esms %>% na.omit, aes(fill = as.factor(color_order)) ) +
  scale_fill_manual(values =colors$orig_color)+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggsave('python_curation_figs/allESMs_rawdata.png', p, height =14, units = 'in')
## Saving 7 x 14 in image
p

# only decreasing precip
shp_esms %>%
  mutate(precip_change = if_else(pr_pct >=0, 'inc', 'dec'))->
  shp_esms2


ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('red', 'blue'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('red', 'blue'))+
  facet_grid(plotesm~experiment ) 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('yellow', 'blue'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('yellow', 'blue'))+
  facet_grid(plotesm~experiment ) 

# same thing but black is low magnitude therefore decreasing P, white is high and therefore inc P
ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('black', 'white'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes( color = precip_change )) +
  scale_color_manual(values = c('black', 'white'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

3d plot to get a sense of the legend

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fig <- plot_ly(region_summary, x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = region_summary$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp126'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp126'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp245'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp245'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp370'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp370'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp585'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp585'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Notes/considerations